# LOAD PACKAGES
pacman::p_load(plyr, dplyr, stringr, rddtools, rdrobust, rddensity, 
               scales, foreign, haven, purrr, pbmcapply, devtools, 
               tidyr, ggplot2, lubridate, ggthemes, outreg, stargazer,  
               gtable, grid, gridExtra, cowplot, tidyquant, jtools, 
               modelsummary, ggsci)

############

# DEFINE PLOT PARAMETER
note1 <- "The outcome variable is XX. Estimates are based on XX. Columns differ with respect to XX. All estimations include the following set of control variables: age, binary for being married, number of children in household, binary for no educational degree, personal income, years of work experience, satisfaction with household income. Robust standard errors in parenthesis. * p < 0.05, ** p < 0.01, *** p < 0.001."
note2 <- ''

robust_temp <- 'HC1'

coef_map <- c('D' = 'ATT', 'age' = 'age', 'married' = 'married',
             'nchild' = '# children', 'nodegree' = 'w/o degree', 'income' = 'income (log)',
             'workexperience' = 'work exp.', 'satishhinc' = 'satisf. w/ HH-inc.',
             '(Intercept)' = 'intercept')

gof_map <- list(list("raw" = "nobs", "clean" = "N", "fmt" = 0),
                list("raw" = "adj.r.squared", "clean" = "adj. R2", "fmt" = 2),
                list("raw" = "bic", "clean" = "BIC", "fmt" = 0),
                list("raw" = "logLik", "clean" = "Log.Lik.", "fmt" = 0))

stars <- c('*' = 0.05, '**' = 0.01, '***' = 0.001)

# DEFINE CONTROL VARIABLES
cont1 <-  c("age") 
cont2 <-  c(cont1, 'married')
cont3 <-  c(cont2, 'nchild')
cont4 <-  c(cont3, 'nodegree')
cont5 <-  c(cont4, 'income')
cont6 <-  c(cont5, 'workexperience')
cont7 <-  c(cont6, 'satishhinc')

#####

# FUNCTION TO PLOT RDD DATA
plotRDDData <- function(ID){
  
  # DEFINE INPUTS
  outcome <- df$y[ID]
  running <- df$x[ID]
  regid <- df$regid[ID]
  th <- df$th_ori[ID]
  temp <- dat %>% 
    subset(., get(regid) == 1) %>% 
    mutate(y = rescale(get(outcome), c(-1, 1)), x = get(running)) %>%
    select(y, x) %>% subset(., complete.cases(.))
  
  # SAVE PLOT
  th_string <- th %>% round(., 3) %>% gsub('\\.', '', .) %>% gsub('\\-', 'min', .)
  fileName <- paste0('5plot/1raw/1_ESS/', regid, '_', outcome, '_', running, '_', th_string, '.jpeg')
  title <- paste(regid, th_string, sep = ' || ')
  jpeg(file = fileName)
  rdd_data(temp$y, temp$x, cutpoint = th) %>% 
    plot(., nbins = 50, xlab = running, ylab = outcome, main = title, ylim = c(-1, 1))
  dev.off()
}

#####

# FUNCTION FOR T-TEST
t.test2 <- function(reg_model1, reg_model2, nonparametric = F) {
  m0 = 0
  reg1 <- reg_model1
  reg1_coef <- reg1 %>% summary() %>% .[['coefficients']] %>% as.data.frame() %>% subset(., row.names(.) == 'D')
  n1 <- length(reg1$fitted.values)
  reg2 <- reg_model2
  reg2_coef <- reg2 %>% summary() %>% .[['coefficients']] %>% as.data.frame() %>% subset(., row.names(.) == 'D')
  n2 <- length(reg2$fitted.values)
  m1 <- reg1_coef[1,1]
  m2 <- reg2_coef[1,1]
  s1 <- reg1_coef[1,2]*(n1^0.5)
  s2 <- reg2_coef[1,2]*(n2^0.5)
  data1 <- scale(1:n1)*s1 + m1
  data2 <- scale(1:n2)*s2 + m2
  dat <- t.test(data1, data2)
  return(dat)
}

#####

# FUNCTION TO EXTRACT ESTIMATES FROM RDD OBJECT
extractRegResultsForPlot <- function(reg, robust_temp = 'HC1', exponentiate = F){
  names(reg) <- unlist(lapply(reg, function(x) x$param))
  a <- reg %>% 
    modelsummary(., vcov = robust_temp, exponentiate = exponentiate, 
                 estimate = "{estimate},{std.error}", fmt = 10, stars = stars, 
                 coef_map = coef_map, notes = list(note2, note1), output = 'data.frame') %>%
    subset(., term == 'ATT' & statistic == 'estimate') %>% select(-c(part, term, statistic)) %>%
    gather(., param, val) %>%
    separate(., col = 'val', sep = c('\\,'), into = c('estimate', 'stderror')) %>%
    mutate_at(vars(matches('estimate|stderror')), as.numeric) %>%
    mutate(zval95 = qnorm(0.025,lower.tail = F), zval99 = qnorm(0.005,lower.tail = F)) %>%
    mutate(cil95 = estimate-(zval95*stderror), ciu95 = estimate+(zval95*stderror),
           cil99 = estimate-(zval99*stderror), ciu99 = estimate+(zval99*stderror))
  return(a)
}

#####

# FUNCTION TO PLOT ATT FOR BANDWIDTH ESTIMATIONS
createATTPlotBW <- function(temp, ybreaks, ylimits = c(-0.5, 0.5), yintercept = 0){
  if(ylimits[2]>=1){
    ylabels <- scales::label_number()
  }else{
    ylabels <- scales::percent_format(accuracy = 1)
  }
  temp <- temp %>% mutate(param = as.numeric(param)/12)
  ggplot() +
    geom_point(data = temp, aes(x = param, y = estimate), size = 3) +
    geom_errorbar(data = temp, aes(x = param, y = estimate, ymin = cil95, ymax = ciu95), size = 1.5, width = 0) +
    geom_errorbar(data = temp, aes(x = param, y = estimate, ymin = cil99, ymax = ciu99), size = 0.75, width = 0) +
    geom_hline(yintercept = yintercept) +
    scale_y_continuous(labels = ylabels, breaks = ybreaks, limits = ylimits) +
    scale_x_continuous(breaks = seq(5, 20, 1)) +
    xlab('bandwidth size (in years)') + ylab('treatment effect') + 
    theme_bw() +
    theme(legend.position = 'bottom', legend.title = element_blank())
}

#####

# FUNCTION TO PLOT ATT FOR DURATION ANALYSIS
createATTPlotDuration <- function(temp, ybreaks = seq(-0.5, 0.5, 0.05), ylimits = c(-0.2, 0.2), yintercept = 0){
  if(ylimits[2]>1){
    ylabels <- scales::label_number()
  }else{
    ylabels <- scales::percent_format(accuracy = 1)
  }
  ggplot() +
    geom_point(data = temp, aes(x = param, y = estimate), size = 3) +
    geom_errorbar(data = temp, aes(x = param, y = estimate, ymin = cil95, ymax = ciu95), size = 1.5, width = 0) +
    geom_errorbar(data = temp, aes(x = param, y = estimate, ymin = cil99, ymax = ciu99), size = 0.75, width = 0) +
    geom_hline(yintercept = yintercept) +
    geom_rect(aes(xmin = "2009-2011", xmax = "2012-2014", ymin = -Inf, ymax = Inf), fill = 'gray70', alpha = 0.4) +
    scale_y_continuous(labels = ylabels, breaks = ybreaks, limits = ylimits) +
    xlab('survey period') + ylab('treatment effect') + 
    theme_bw() +
    theme(legend.position = 'bottom', legend.title = element_blank(),
          axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1))
}

#####

# FUNCTION TO DEFINE DATASET DURATION FOR DURATION ANALYSIS
defineDatasetDuration <- function(){
  start <- years[j]
  end <- years[j]+2
  df_temp <- df_dur %>% 
    subset(., syear %in% c(start:end)) %>%
    select(x, y, pid, hid, all_of(contVarBase)) %>%
    subset(., complete.cases(.)) %>%
    group_by(pid) %>% mutate(n_pid = n()) %>% ungroup() %>% subset(., n_pid >= 2)
  dat_temp1 <- rdd_data(df_temp$y, df_temp$x, covar = df_temp[,contVarBase], cutpoint = cutoff)
  return(dat_temp1)
}

#####

# FUNCTION FOR ROBUST CLUSTERED STANDARD ERRORS
vcovCL_custom <- function(model, cluster = model$RDDslot$rdd_data$pid, type = "HC1", ...) {
  vcovCL(model, cluster = cluster, type = type, ...)
}

#####

# FUNCTION TO COUNT OBSERVATION IN RDD
# source_url('https://raw.githubusercontent.com/bquast/rddtools/master/R/get_methods.R')
rdd_reg_lm_nobs <- function(x, bw = bw, ...) {
  cutpoint <- attr(x, "cutpoint")
  x_var <- getOriginalX(x)
  n_left <- sum(x_var >= cutpoint - bw & x_var < cutpoint)
  n_right <- sum(x_var >= cutpoint & x_var <= cutpoint + bw)
  nobs <- sum(n_left + n_right)
  return(nobs)
}
